


library(tidysynth) #Running and manipulating synthetic control model results
library(tidyverse) #Load tidyverse for piping and ggplot edits
library(readit)    #Loading in outside data formats

#Loading Data


complete_perc_synth_results <- readRDS("Data/Results/complete_synth_results.rds")

dose1_perc_synth_results <- readRDS("Data/Results/dose1_synth_results.rds")

treatment_dates <- readRDS("Data/treatment_dates.rds")

treated_vaccine_data <- readRDS("Data/treated_vaccine_data.rds")

pres_vote_dat <- readit("Data/1868_2020_presvote.dta")


#### Summary Tables, Including Mean Squared Prediction Error (MSPE), Pre- and Post-Treatment ####

##NOTE: Low pre-treatment MSPE and high post-treatment MSPE imply that the treatment had a significant effect##
##This can also be assessed by looking at "mspe_ratio", with values close to 1 implying no effect and larger values implying a statistically significant effect##
## See Abadie et al. 2010 for more information on the MSPE Ratio. ##
## We can also plot graphs of the MSPE Ratio using "plot_mspe_ratio", see below ##


complete_perc_mspe <- NA

for(i in names(complete_perc_synth_results[[1]])){
  #print(i)
  hold <- grab_signficance(complete_perc_synth_results[[1]][[i]])
  
  complete_perc_mspe <- rbind.data.frame(complete_perc_mspe,hold[hold$unit_name == i,])
  #print(hold[hold$unit_name == i,])
}
complete_perc_mspe <- complete_perc_mspe[-1,]

complete_perc_mspe$stat.imp <- NA

complete_perc_mspe$stat.imp[complete_perc_mspe$fishers_exact_pvalue < .1] <- T

complete_perc_mspe$stat.imp[complete_perc_mspe$fishers_exact_pvalue > .1] <- F


dose1_perc_mspe <- NA

for(i in names(dose1_perc_synth_results[[1]])){
  #print(i)
  hold <- grab_signficance(dose1_perc_synth_results[[1]][[i]])
  
  dose1_perc_mspe <- rbind.data.frame(dose1_perc_mspe,hold[hold$unit_name == i,])
  #print(hold[hold$unit_name == i,])
}
dose1_perc_mspe <- dose1_perc_mspe[-1,]

dose1_perc_mspe$stat.imp <- NA

dose1_perc_mspe$stat.imp[dose1_perc_mspe$fishers_exact_pvalue < .1] <- T

dose1_perc_mspe$stat.imp[dose1_perc_mspe$fishers_exact_pvalue > .1] <- F


### All Dataframes of MSPE ###

complete_perc_mspe

dose1_perc_mspe


### MSPE Plots ###

for(i in names(complete_perc_synth_results[[1]])){
  print(i)
  print(plot_mspe_ratio(complete_perc_synth_results[[1]][[i]]))
}

for(i in names(dose1_perc_synth_results[[1]])){
  print(i)
  print(plot_mspe_ratio(dose1_perc_synth_results[[1]][[i]]))
}


### Creating MSPE Tables ###

complete_perc_mspe_table <- complete_perc_mspe[,c(-2,-6,-9)]

complete_perc_mspe_table[,1] <- full.state.names$full

complete_perc_mspe_table[,2:6] <- round(complete_perc_mspe_table[,2:6],digits=4)

complete_perc_mspe_table


dose1_perc_mspe_table <- dose1_perc_mspe[,c(-2,-6,-9)]

dose1_perc_mspe_table[,1] <- full.state.names$full

dose1_perc_mspe_table[,2:6] <- round(dose1_perc_mspe_table[,2:6],digits=4)

ose1_perc_mspe_table


#### Time-Averaged Daily Differences Plots ####

complete_perc_TADD <- data.frame(state = names(complete_perc_synth_results[[1]]), difference = NA, low.ci = NA, high.ci = NA, p.value = NA, label = NA)

dose1_perc_TADD <- data.frame(state = names(complete_perc_synth_results[[1]]), difference = NA, low.ci = NA, high.ci = NA, p.value = NA, label = NA)

dat_plot_complete_perc <- data.frame(state = NA, difference = NA, time_unit = NA)
dat_plot_dose1_perc <- data.frame(state = NA, difference = NA, time_unit = NA)


for(i in names(complete_perc_synth_results[[1]])){
  hold <- complete_perc_synth_results[[1]][[i]] %>% 
    grab_synthetic_control() %>% 
    pivot_longer(!time_unit, names_to = "type", values_to = "estimate")
  hold <- hold[hold$time_unit > treatment_dates$treatment.date[treatment_dates$state == i],]
  test <- t.test(estimate ~ type, data = hold, paired = T)
  
  complete_perc_TADD$difference[complete_perc_TADD$state == i] <- test$estimate
  complete_perc_TADD$low.ci[complete_perc_TADD$state == i] <- test$conf.int[1]
  complete_perc_TADD$high.ci[complete_perc_TADD$state == i] <- test$conf.int[2]
  complete_perc_TADD$p.value[complete_perc_TADD$state == i] <- test$p.value
  complete_perc_TADD$label[complete_perc_TADD$state == i] <- if(test$p.value < 0.01) paste0(round(test$estimate,digits = 2),"***") else 
    if(test$p.value < 0.05) paste0(round(test$estimate,digits = 2),"**") else 
      if(test$p.value < 0.1) paste0(round(test$estimate,digits = 2),"*") else 
        NA
  
  hold2 <- data.frame(state = rep(i,(NROW(hold)/2)), difference = (hold$estimate[hold$type == "real_y"] - hold$estimate[hold$type == "synth_y"]), time_unit = unique(hold$time_unit))
  
  dat_plot_complete_perc <- rbind.data.frame(dat_plot_complete_perc, hold2)
}

dat_plot_complete_perc <- dat_plot_complete_perc[-1,]



for(i in names(dose1_perc_synth_results[[1]])){
  hold <- dose1_perc_synth_results[[1]][[i]] %>% 
    grab_synthetic_control() %>% 
    pivot_longer(!time_unit, names_to = "type", values_to = "estimate")
  hold <- hold[hold$time_unit > treatment_dates$treatment.date[treatment_dates$state == i],]
  test <- t.test(estimate ~ type, data = hold, paired = T)
  
  dose1_perc_TADD$difference[complete_perc_TADD$state == i] <- test$estimate
  dose1_perc_TADD$low.ci[complete_perc_TADD$state == i] <- test$conf.int[1]
  dose1_perc_TADD$high.ci[complete_perc_TADD$state == i] <- test$conf.int[2]
  dose1_perc_TADD$p.value[complete_perc_TADD$state == i] <- test$p.value
  dose1_perc_TADD$label[complete_perc_TADD$state == i] <- if(test$p.value < 0.01) paste0(round(test$estimate,digits = 2),"***") else 
    if(test$p.value < 0.05) paste0(round(test$estimate,digits = 2),"**") else 
      if(test$p.value < 0.1) paste0(round(test$estimate,digits = 2),"*") else 
        NA
  
  hold2 <- data.frame(state = rep(i,(NROW(hold)/2)), difference = (hold$estimate[hold$type == "real_y"] - hold$estimate[hold$type == "synth_y"]), time_unit = unique(hold$time_unit))
  
  dat_plot_dose1_perc <- rbind.data.frame(dat_plot_dose1_perc, hold2)
}

dat_plot_dose1_perc <- dat_plot_dose1_perc[-1,]



### Plots and partisanship regression for red/blue/purple states ###

## Data Cleanup ##

complete_perc_TADD$dvote <- pres_vote_dat$dvote[pres_vote_dat$state %in% complete_perc_TADD$state & pres_vote_dat$year == 2020]

complete_perc_TADD$state.type <- NA

complete_perc_TADD$state.type[complete_perc_TADD$dvote > 48 & complete_perc_TADD$dvote < 52] <- "Purple"

complete_perc_TADD$state.type[complete_perc_TADD$dvote > 52] <- "Blue"

complete_perc_TADD$state.type[complete_perc_TADD$dvote < 48] <- "Red"


complete_perc_TADD <- cbind.data.frame(complete_perc_TADD, unique(treated_vaccine_data[,c("will_def_perc","will_prob_perc","unsure_perc","not_prob_perc","not_def_perc")]))


dose1_perc_TADD$dvote <- pres_vote_dat$dvote[pres_vote_dat$state %in% dose1_perc_TADD$state & pres_vote_dat$year == 2020]

dose1_perc_TADD$state.type <- NA

dose1_perc_TADD$state.type[dose1_perc_TADD$dvote > 48 & dose1_perc_TADD$dvote < 52] <- "Purple"

dose1_perc_TADD$state.type[dose1_perc_TADD$dvote > 52] <- "Blue"

dose1_perc_TADD$state.type[dose1_perc_TADD$dvote < 48] <- "Red"


dose1_perc_TADD <- cbind.data.frame(dose1_perc_TADD, unique(treated_vaccine_data[,c("will_def_perc","will_prob_perc","unsure_perc","not_prob_perc","not_def_perc")]))



##Partisanship, Lottery Value, and Hesitancy Regressions ##

complete_perc_TADD$lottery.value <- c(2000000,116500000,6250000,5000000,10000000,4200000,2300000,5500000,2000000,896809,5495000,9000000,4500000,10000000,5000000,NA,5600000,1500000,2400000,10000000)

dose1_perc_TADD$lottery.value <- c(2000000,116500000,6250000,5000000,10000000,4200000,2300000,5500000,2000000,896809,5495000,9000000,4500000,10000000,5000000,NA,5600000,1500000,2400000,10000000)

complete_perc_TADD$lottery.value.per.capita <- complete_perc_TADD$lottery.value/unique(treated_vaccine_data$raw_total_population[treated_vaccine_data$state %in% treatment_dates$state])

dose1_perc_TADD$lottery.value.per.capita <- dose1_perc_TADD$lottery.value/unique(treated_vaccine_data$raw_total_population[treated_vaccine_data$state %in% treatment_dates$state])

#Per 100k

complete_perc_TADD$lottery.value <- complete_perc_TADD$lottery.value/1000000

dose1_perc_TADD$lottery.value <- dose1_perc_TADD$lottery.value/1000000

complete_perc_TADD$hes <- complete_perc_TADD$unsure_perc/sum(c(complete_perc_TADD$not_prob_perc,complete_perc_TADD$not_def_perc))


library(stargazer)

reg1 <- lm(difference ~ dvote, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])

summary(reg1)

reg2 <- lm(difference ~ dvote, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

summary(reg2)

summary(lm(difference ~ dvote + will_prob_perc, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),]))

reg_hes_1 <- lm(difference ~ unsure_perc, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])
reg_hes_2 <- lm(difference ~ unsure_perc, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

reg_hes_1 <- lm(difference ~ hes, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])
reg_hes_2 <- lm(difference ~ hes, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

reg_lotto_1 <- lm(difference ~ lottery.value, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])
reg_lotto_2 <- lm(difference ~ lottery.value, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

reg_lotto_cap_1 <- lm(difference ~ lottery.value.per.capita, 
                      data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])
reg_lotto_cap_2 <- lm(difference ~ lottery.value.per.capita, 
                      data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

reg_sat_1 <- lm(difference ~ dvote + unsure_perc + lottery.value.per.capita, 
                data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),])
reg_sat_2 <- lm(difference ~ dvote+ unsure_perc + lottery.value.per.capita, 
                data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),])

stargazer(reg2)

stargazer(reg_hes_1)
stargazer(reg_hes_2)

summary(reg_hes_1)

summary(reg_hes_2)

stargazer(reg_lotto_1)
stargazer(reg_lotto_2)

summary(reg_lotto_1)

stargazer(reg_lotto_cap_1)
stargazer(reg_lotto_cap_2)

stargazer(reg_sat_1)
stargazer(reg_sat_2)



####
summary(lm(difference ~ dvote + will_def_perc + will_prob_perc + unsure_perc + not_prob_perc, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),]))

summary(lm(difference ~ not_prob_perc, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),]))


summary(lm(difference ~ dvote + will_def_perc + will_prob_perc + unsure_perc + not_prob_perc, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),]))


summary(lm(difference ~ lottery.value.per.capita, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),]))
summary(lm(difference ~ lottery.value.per.capita, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),]))

summary(lm(difference ~ lottery.value, data = dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),]))
summary(lm(difference ~ lottery.value, data = complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),]))


## TADD Plots ##

state.label <- rep(NA, 20)


for(i in 1:NROW(complete_perc_TADD$state)){
 state.label[i] <- paste0(complete_perc_TADD$state[i], "\n(", complete_perc_TADD$dvote[i], "%)")
}

state.label[19] <- "WA\n(58.0%)"


complete_perc_TADD$state.label <- state.label

dose1_perc_TADD$state.label <- state.label


plot.complete.perc.imp <- ggplot(complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),],
                                  aes(x=state.label,y=difference,factor=state.label,group=state.label,color=state.type)) + 
   coord_flip() + 
   geom_linerange(aes(x = state.label, ymin = low.ci, ymax = high.ci), 
                  position = position_dodge(width=0.75), lwd  = 1) + 
   geom_pointrange(aes(x= state.label, ymin = low.ci, ymax = high.ci), 
                   lwd = 1/2, position = position_dodge(width=0.75),fill="white") + 
   theme_minimal() + 
   scale_x_discrete("State by Biden Vote Share 2020", 
                    limits = unique(complete_perc_TADD$state.label[!complete_perc_TADD$state %in% c("NM","ME")][order(complete_perc_TADD$dvote[!complete_perc_TADD$state %in% c("NM","ME")])])) + 
   scale_y_continuous("Time Averaged Daily Differences\n(Treatment versus Synthetic Control)", breaks = seq(-2,2,1),limits = c(-2.5,2.5), minor_breaks = NULL) + 
   scale_color_manual(values = c("royalblue4","purple3","firebrick")) +
   geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) + 
   theme(axis.text.x = element_text(hjust = 0.5),
         axis.text.y = element_text(hjust = 0.5),
         axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
         axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0)),
         legend.position = "none") + 
   expand_limits(x = NROW(complete_perc_TADD[!complete_perc_TADD$state %in% c("NM","ME"),]) + 1, y = 2.6) #+
   #geom_label(vjust=-0.25,hjust=0.25,fill="white") + 
   #labs(caption="* p < 0.1; ** p < 0.05; *** p < 0.01", 
        #title = "Effect of Lottery on Complete Vaccination by State and Electoral Competitiveness (Percentage)")
 
 plot.complete.perc.imp
 
 ggsave("Figures/Analysis/effect_plot_complete_perc.pdf", plot = plot.complete.perc.imp, height = 6, width = 9, units = "in")
 
 ggsave("Figures/Analysis/effect_plot_dose1_100k.pdf", plot = plot.dose1.100k.imp, height = 6, width = 9, units = "in")
 
 
 plot.dose1.perc.imp <- ggplot(dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),],
                               aes(x=state.label,y=difference,factor=state.label,group=state.label,color=state.type)) + 
   coord_flip() + 
   geom_linerange(aes(x = state.label, ymin = low.ci, ymax = high.ci), 
                  position = position_dodge(width=0.75), lwd  = 1) + 
   geom_pointrange(aes(x= state.label, ymin = low.ci, ymax = high.ci), 
                   lwd = 1/2, position = position_dodge(width=0.75),fill="white") + 
   theme_minimal() + 
   scale_x_discrete("State by Biden Vote Share 2020", 
                    limits = unique(dose1_perc_TADD$state.label[!dose1_perc_TADD$state %in% c("NM","ME")][order(dose1_perc_TADD$dvote[!dose1_perc_TADD$state %in% c("NM","ME")])])) + 
   scale_y_continuous("Time Averaged Daily Differences\n(Treatment versus Synthetic Control)", breaks = seq(-2,2,1),limits = c(-2.5,2.5), minor_breaks = NULL) + 
   scale_color_manual(values = c("royalblue4","purple3","firebrick")) +
   geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) + 
   theme(axis.text.x = element_text(hjust = 0.5),
         axis.text.y = element_text(hjust = 0.5),
         axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
         axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0)),
         legend.position = "none") + 
   expand_limits(x = NROW(dose1_perc_TADD[!dose1_perc_TADD$state %in% c("NM","ME"),]) + 1, y = 2.6) #+
   #geom_label(vjust=-0.25,hjust=0.25,fill="white") + 
   #labs(caption="* p < 0.1; ** p < 0.05; *** p < 0.01", 
        #title = "Effect of Lottery on First Dose Vaccination by State and Electoral Competitiveness (Percentage)")
 
 plot.dose1.perc.imp


 ggsave("Figures/Analysis/effect_plot_dose1_perc.pdf", plot = plot.dose1.perc.imp, height = 6, width = 9, units = "in")



#### Trend Plot of All States ####

full.state.names <- data.frame(abbr = dose1_perc_TADD$state, full = c("Arkansas","California","Colorado","Delaware","Illinois","Kentucky","Louisiana","Massachusetts","Maryland","Maine","Michigan","Missouri","North Carolina", "New Mexico", "Nevada", "New York", "Ohio", "Oregon", "Washington", "West Virginia"))
 
 
 for(i in unique(complete_perc_TADD$state)){
   hold <- complete_perc_synth_results[[2]][[i]] + 
     theme(plot.caption = element_text(hjust = 0.05, vjust = 225),
           axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
           axis.title.x = element_text(margin = margin(t = 0, r = 0, b = -10, l = 0))) +
     labs(x = "", 
          y = "Percentage of Completely Vaccinated Individuals", 
          title = paste0("Vaccination Trend Lines of Synthetic and Actual ",full.state.names$full[full.state.names$abbr == i]), 
          caption = paste0("Pre-Lottery MSPE = ",round(complete_perc_mspe$pre_mspe[complete_perc_mspe$unit_name == i], digits = 4)))
   ggsave(paste0("Figures/Trend_Plots/Complete_Perc/",i,"_complete_perc_trend_plot.pdf"), 
          plot = hold, height = 10, width = 10, units = "in")
 }

 
for(i in unique(dose1_perc_TADD$state)){
   hold <- dose1_perc_synth_results[[2]][[i]] + 
     theme(plot.caption = element_text(hjust = 0.05, vjust = 225),
           axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
           axis.title.x = element_text(margin = margin(t = 0, r = 0, b = -10, l = 0))) +
     labs(x = "", 
          y = "Percentage of First Dose Vaccinated Individuals", 
          title = paste0("Vaccination Trend Lines of Synthetic and Actual ",full.state.names$full[full.state.names$abbr == i]), 
          caption = paste0("Pre-Lottery MSPE = ",round(dose1_perc_mspe$pre_mspe[dose1_perc_mspe$unit_name == i], digits = 4))) 
   ggsave(paste0("Figures/Trend_Plots/Dose1_Perc/",i,"_dose1_perc_trend_plot.pdf"), 
          plot = hold, height = 10, width = 10, units = "in")
 }
 
 
 
#### Control weight plots for units and variables ####

for(i in unique(dose1_perc_TADD$state)){
  hold <- plot_weights(dose1_perc_synth_results[[1]][[i]]) + 
        labs(y = "Weight", 
         title = paste0("Synthetic Control Weights for ",
                        full.state.names$full[full.state.names$abbr == i]," (First Dose)"))
  
  ggsave(paste0("Figures/Weight_Plots/Dose1_Perc/",
                i,"_dose1_perc_weight_plot.pdf"), 
         plot = hold, height = 10, width = 10, units = "in")
}


for(i in unique(complete_perc_TADD$state)){
  hold <- plot_weights(complete_perc_synth_results[[1]][[i]]) + 
    labs(y = "Weight", 
         title = paste0("Synthetic Control Weights for ",
                        full.state.names$full[full.state.names$abbr == i], (" (Complete)")))
  
  ggsave(paste0("Figures/Weight_Plots/Complete_Perc/",
                i,"_complete_perc_weight_plot.pdf"), 
         plot = hold, height = 10, width = 10, units = "in")
}








